set more off 
pause off
set logtype text
set mem 500M

*************** DESCRIPTION ******************************************
* Loads and reformats industry-level datasets from the BEA, including
* Fixed assets 
*	- capital stock, gross investment, depreciation and avg age
*	- chained and current cost
* 	- Equipment, structures, IP and all asset types
*	
* Gross output
* 	- chained and current cost
*	- value added
* 
* The following notation is used:
*	a1_ = industry totals
*
* 	k = current cost stock of private fixed assets --> should be the same as krc in FRED data
* 	i = consumer durables
* 	dep = depreciation
* 	age  = average age
* 
* 	p_ = current cost
* 	q_ = chained as of 2009
*
* 	st = structures
* 	eq = equipment
* 	ip = intellectual property
* 	all = all investment types
*
* Inputs:
*	Section3All.xls: investment data from BEA
*	ValueAdded.xls: Operating surplus data from BEA
*	LevelKey.xls: Chosen 'levels' of analysis based on BEA segments 
*
* Outputs: 
*	2.intermediate/BEA_industry
*****************************************************************


/* ------------------------------------ */ 
/*		DATA LOADING AND FORMATTING		*/
/* ------------------------------------ */


**** LOAD FIXED ASSETS DATA

local mylist  301 302 304 305 307 308 309 
local mylist2 E S I ESI

foreach ii of local mylist {
foreach jj of local mylist2 {

import excel 0.raw_inputs/BEA/Section3All_2015.xls, sheet(`ii'`jj' Ann) cellrange(B8:BT115) firstrow clear

rename B ind
rename C id
bys id: keep if _n==1

ds ind id, not
foreach v of var `r(varlist)'{
   local x : variable label `v'
   rename `v' y`x'
}

reshape long y, i(ind id) j(year)

drop if ind==""
g field = substr(id, 1, 2)
g na = substr(id, 3, 1)
g indcode = substr(id, 4, 4)

** Keep relevant fields
compress
sort ind 
merge m:1 ind using 6.Temp\levelkey_bea
drop _merge
keep if keep_ind ==1

* Aggregate across rows for chosen segmentation
destring y, replace
rename y y_ind
egen y = sum(y_ind),by(field indcode year)
drop y_ind 
bys field indcode year: keep if _n==1


* Quantity
if `ii' == 301 {
	local qty  = "kp"
}
else if `ii' == 302 {
	local qty  = "kq"
}
else if `ii' == 304 {
	local qty  = "depp"
}
else if `ii' == 305 {
	local qty  = "depq"
}
else if `ii' == 307 {
	local qty  = "ip"
}
else if `ii' == 308 {
	local qty  = "iq"
}
else if `ii' == 309 {
	local qty  = "agep"
}

* Asset type
if "`jj'" == "E" {
	local atype = "eq"
}
else if "`jj'" == "S" {
 local atype = "st"
}
else if "`jj'" == "I" {
 local atype = "ip"
}
else if "`jj'" == "ESI" {
 local atype = "all"
}

rename y a1_`qty'_`atype'_bea

drop field na id 

if "`ii'`jj'" ~= "301E" {
compress
sort indcode year 
merge 1:1 indcode year using temp
drop _merge
}

save temp, replace
}
}


***


**** MERGE VALUE ADDED DATA
import excel 0.raw_inputs/BEA/GDPbyInd_VA_1947-2015.xlsx, sheet("Components") cellrange(B6:AF407) firstrow clear
drop if indcode ==""

ds name field indcode, not
foreach v of var `r(varlist)'{
   local x : variable label `v'
   rename `v' y`x'
}

reshape long y, i(indcode  field) j(year)
drop name 

* Sum across combined subsegments
destring y, replace
replace y = y/1000 // data in millions
rename y y_ind
egen y = sum(y_ind),by(field indcode year)
drop y_ind 
bys field indcode year: keep if _n==1

reshape wide y, i(indcode year) j(field ["GOS" "Wages" "Taxes"])

label variable yGOS "Gross operating surplus"
label variable yWages "Compensation"
label variable yTaxes "Taxes"

rename yGOS a1_gos_bea
rename yWages a1_wages_bea
rename yTaxes a1_taxes_bea

compress
sort indcode year
merge 1:1 indcode year using temp
drop _merge

destring a1_gos a1_wages a1_taxes, replace
save temp, replace


***


**** MERGE OUTPUT DATA
import excel 0.raw_inputs/BEA/GDPbyInd_GO_1947-2016.xlsx, sheet("GO") cellrange(B6:BT108) firstrow clear
drop if indcode ==""

ds name indcode , not
foreach v of var `r(varlist)'{
   local x : variable label `v'
   rename `v' y`x'
}

drop y1947-y1963
destring, replace
reshape long y, i(indcode ) j(year)
drop name 

* Sum across combined subsegments
destring y, replace
replace y = y/1000 // data in millions
rename y y_ind
egen y = sum(y_ind),by(indcode year)
drop y_ind 
bys indcode year: keep if _n==1

rename y a1_output_bea
label variable a1_output_bea "Gross Output"

compress
sort indcode year
merge 1:1 indcode year using temp
drop _merge

** re-organize and save
order indcode ind_short ind year 
sort  indcode year

drop if ind_short == ""
save temp.dta, replace


***


* COLLAPSE INTO DESIRED INDUSTRY GROUPINGS
use temp,clear

* Aggregate quantity indices 
foreach i in k i dep{
	foreach j in all eq st ip {
		g temp = a1_`i'p_`j'_bea if year == 2009
		egen a1_`i'p_`j'_bea09 = max(temp), by(ind)
		replace a1_`i'q_`j'_bea = a1_`i'q_`j'_bea * a1_`i'p_`j'_bea09 / 100
		drop a1_`i'p_`j'_bea09 temp 
	}
}

drop ind indcode keep_ind beacode
ds ind_short siccode year, not
collapse (sum) `r(varlist)' (mean) sicc, by (ind_short  year)

replace a1_gos = . if year <= 1986
replace a1_wages = . if year <= 1986
replace a1_taxes = . if year <= 1986

* Operating surplus
g a1_os_bea = a1_gos - a1_depp_all
label variable a1_os_bea "Net operating surplus"

encode ind_short , gen(pcode)
xtset pcode year

* Compute industry-level variables
foreach X in all eq st ip {
	* Compute net investment, investment rates and depreciation rates
	g a1_depk_`X'_bea = a1_depp_`X'_bea/l.a1_kp_`X'_bea
	g a1_nip_`X'_bea = a1_ip_`X'_bea - a1_depp_`X'_bea
	g a1_ik_`X'_bea = a1_ip_`X'_bea/l.a1_kp_`X'_bea
	g a1_nik_`X'_bea = a1_nip_`X'_bea/l.a1_kp_`X'_bea
	g a1_dkp_`X'_bea = a1_kp_`X'_bea - l.a1_kp_`X'_bea
		
	label variable a1_depk_`X'_bea "BEA dep rate - `X'"	
	label variable a1_nip_`X'_bea "BEA Net inv - `X'"
	label variable a1_ik_`X'_bea "BEA Inv rate - `X'"
	label variable a1_nik_`X'_bea "BEA Net inv rate - `X'"
	label variable a1_dkp_`X'_bea "BEA change in capital (curr cost) - `X'"
		
	g a1_iy_`X'_bea = a1_ip_`X'_bea/a1_os_bea
	g a1_niy_`X'_bea = a1_nip_`X'_bea/a1_os_bea
	g a1_nigy_`X'_bea = a1_nip_`X'_bea/a1_gos_bea
	
	replace a1_nigy_`X'_bea = -0.2 if a1_nigy_`X'_bea < -0.2
	replace a1_nigy_`X'_bea = 2 if a1_nigy_`X'_bea > 2
	replace a1_niy_`X'_bea = -0.2 if a1_niy_`X'_bea < -0.2
	replace a1_niy_`X'_bea = . if a1_niy_`X'_bea > 2
	replace a1_iy_`X'_bea = -0.2 if a1_iy_`X'_bea < -0.2
	replace a1_iy_`X'_bea = 2 if a1_iy_`X'_bea > 2
	
	label variable a1_nigy_`X'_bea "BEA I/GOS - `X'"
	label variable a1_niy_`X'_bea "BEA net I/NOS - `X'"
	label variable a1_iy_`X'_bea "BEA I/NOS- `X'"
	
	g a1_depkq_`X'_bea = a1_depq_`X'_bea/l.a1_kq_`X'_bea
	g a1_niq_`X'_bea = a1_iq_`X'_bea-a1_depq_`X'_bea
	g a1_nikq_`X'_bea = a1_niq_`X'_bea/l.a1_kq_`X'_bea
	g a1_dkq_`X'_bea = (a1_kq_`X'_bea - l.a1_kq_`X')/l.a1_kq_`X'_bea
		
	label variable a1_niq_`X'_bea "BEA Net inv (qty) - `X'"
	label variable a1_nikq_`X'_bea "BEA Net inv rate (qty) - `X'"
	label variable a1_dkq_`X'_bea "BEA change in capital (qty) - `X'"	
}
g a1_depk_exip_bea = (a1_depp_all_bea-a1_depp_ip_bea)/(l.a1_kp_all_bea-l.a1_kp_ip_bea)

g a1_nip_exip_bea = (a1_ip_all_bea-a1_ip_ip_bea) - (a1_depp_all_bea-a1_depp_ip_bea)
g a1_ik_exip_bea = (a1_ip_all_bea-a1_ip_ip_bea)/(l.a1_kp_all_bea-l.a1_kp_ip_bea)
g a1_nik_exip_bea = a1_nip_exip_bea/(l.a1_kp_all_bea-l.a1_kp_ip_bea)
		
g a1_osk_bea = a1_os_bea / l.a1_kp_all_bea


***

* Compute aggregate variables
foreach X in all eq st ip {
	* Compute net investment, investment rates and depreciation rates
	egen a_kp_`X'_bea = sum(a1_kp_`X'_bea),by(year)
	egen a_ip_`X'_bea = sum(a1_ip_`X'_bea),by(year)
	egen a_depp_`X'_bea = sum(a1_depp_`X'_bea),by(year)
	
	g 	a_nip_`X'_bea = a_ip_`X'_bea - a_depp_`X'_bea
	g 	a_ik_`X'_bea = a_ip_`X'_bea/l.a_kp_`X'_bea
	g 	a_depk_`X'_bea = a_depp_`X'_bea/l.a_kp_`X'_bea
	g 	a_nik_`X'_bea = a_nip_`X'_bea/l.a_kp_`X'_bea
	
	egen a_kq_`X'_bea = sum(a1_kq_`X'_bea),by(year)
	egen a_iq_`X'_bea = sum(a1_iq_`X'_bea),by(year)
	egen a_depq_`X'_bea = sum(a1_depq_`X'_bea),by(year)
	
	g 	a_niq_`X'_bea = a_iq_`X'_bea - a_depq_`X'_bea
	g 	a_ikq_`X'_bea = a_iq_`X'_bea/l.a_kq_`X'_bea
	g 	a_depkq_`X'_bea = a_depq_`X'_bea/l.a_kq_`X'_bea
	g 	a_nikq_`X'_bea = a_niq_`X'_bea/l.a_kq_`X'_bea
}

g a_depk_exip_bea = (a_depp_all_bea-a_depp_ip_bea)/(l.a_kp_all_bea-l.a_kp_ip_bea)
g a_nip_exip_bea = (a_ip_all_bea-a_ip_ip_bea) - (a_depp_all_bea-a_depp_ip_bea)
g a_ik_exip_bea = (a_ip_all_bea-a_ip_ip_bea)/(l.a_kp_all_bea-l.a_kp_ip_bea)
g a_nik_exip_bea = a_nip_exip_bea/(l.a_kp_all_bea-l.a_kp_ip_bea)

egen a_gos_bea = sum(a1_gos_bea),by(year) missing
egen a_os_bea = sum(a1_os_bea),by(year) missing
egen a_output_bea = sum(a1_output_bea),by(year) missing

g 	a_osk_bea = a_os/l.a_kp_all_bea
g 	a_iy_bea = a_ip_all_bea/a_os_bea
g 	a_niy_bea = a_nip_all_bea/a_os_bea
g 	a_nigy_bea = a_nip_all_bea/a_gos_bea

label variable a_nigy_bea "net investment/gross operating surplus"
label variable a_niy_bea "net investment/operating surplus"
label variable a_iy_bea "investment/operating surplus"
	
label variable a_depk_all_bea "Dep rate"	
label variable a_nip_all_bea "Net Inv"
label variable a_ik_all_bea "Inv rate"
label variable a_nik_all_bea "Net inv rate"

drop pcode
compress
save 2.intermediate/BEA_industry, replace

* Selected tests from manual replication: 
use 2.intermediate/BEA_industry, clear
g test1 =  (a1_ik_eq_bea - 0.154812) if ind_short == "Min_Oil_and_gas" & year == 2015
g test2 =  (a1_nik_eq_bea - 0.006974) if ind_short == "Min_Oil_and_gas" & year == 2015
g test3 =  a1_ik_all - 0.07931128 if ind_short == "Min_Oil_and_gas" & year == 2015
g test4 =  a1_nik_all - 0.00596529 if ind_short == "Min_Oil_and_gas" & year == 2015
g test5 =  (a1_output_bea - 1172.204) if ind_short == "Health_other" & year == 2015
g test6 =  (a1_output_bea - 1034.445) if ind_short == "Health_hospitals" & year == 2015
g test7 = (a1_kq_eq_bea - 150.568) if ind_short == "Dur_Transp" & year == 2014
g test8 = (a_kp_eq_bea - 4608.5) if ind_short == "Dur_Transp" & year == 2014
g test9 = (a_output_bea - 21677.142) if ind_short == "Dur_Transp" & year == 2014 
su test*
drop test*
pause


